import org.apache.spark.sql.SparkSession

object ad {

  def main(args: Array[String]): Unit = {
    // 创建 SparkSession
    val spark = SparkSession.builder()
      .appName("Test")
      .master("local[*]")
      .getOrCreate()
    val sc = spark.sparkContext

    try {
      //从内存中读取数据创建RDD--parallelize()
      // 1. 定义数据
val rdd = sc.parallelize(
        List("this is a test","how are you","i am fine","can you tell me"))
      val words = rdd.map(x=>(x.split(" ")(0),x))
      words.collect
      words.collect().foreach(println)
    } finally {
      // 停止 SparkSession
      spark.stop()
    }
  }
}
